
************** OVerview*****************
*DES: 11/23/18
* Merges in data, cleans up the file, makes summary stats
***********************************************


set more off

clear matrix
clear mata
clear
set mem 10000m
set maxvar 32767
set matsize 11000

capture log close

local Claudia = 0
local David = 1
local Jenni=0

**Claudia's globals
if `Claudia' == 1{
global home "C:\Florida projects\dissertation\schools wind pollution"
global output "C:\Users\Claudia Persico\Dropbox\Research on Florida Wind Patters\School wind pollution\output\whole sample results"
global samples "$home\samples"
global schvar "C:\Users\Claudia Persico\Dropbox\Research on Florida Wind Patters\data\School Data\School Location"

*use "$samples\FLschoolswindroadsid2010ncesfsirnewaadt4mi51918.dta", clear
*use "$samples\FLschoolswindroadsid2010ncesfsirnewaadt4mi18k52118.dta", clear
*use "$samples\FLschoolswindroadsid2010ncesfsirnewaadt4mi18k53118.dta", clear
*use "$samples\FLschoolswindroadsid2010ncesfsirnewaadt4mi18k6618.dta", clear
*use "$samples\FLschoolswindroadsid2010ncesfsirnewaadt4mi18k7618.dta", clear
use "$samples\FLschoolswindroadsid2010ncesfsirnewaadt4mi18k112118.dta", replace

gen down60_any_mjrhwy_4_test=(dwind_any_win4_test>=.6) 
replace down60_any_mjrhwy_4_test=. if mi_to_nid1_mjrhwy>.4 | dwind_any_win4_test==.

rename windtreat1_test dwintensity4h_test
rename incident behaveincident
replace behaveincident = 1 if nincident >0 & nincident!=.


}

log using "$output\setuprdcetestwk_$S_DATE.log", replace

*These are averages for the whole sample
replace TeachDegree=TeachDegree/100 if TeachDegree>1
replace stability = stability/100 if stability>1
replace FRL = FRL/100 if FRL>1

sum  size TeachDegree frl stability  momedbyschool mommarried 

********************distance of the move
rename (latcod loncod) (latcodorig loncodorig)

merge m:1 ncessch using "$schvar/NCES_2010.dta", keepusing(latcod loncod)
	drop _merge

bys id (year): gen latcodlast=latcod[_n-1]
bys id (year): gen loncodlast=loncod[_n-1]
geodist latcod loncod  latcodlast loncodlast, gen(move_dist) mi
	label var move_dist "Distance of school move"
	drop latcodlast loncodlast

***Limiting the data
keep if mi_to_nid1_aadt25k<=0.4 |mi_to_nid_aadt48<=0.4 |mi_to_nid_IS<=0.4 |mi_to_nid_USH<=0.4 |mi_to_nid1_mjrhwy<=0.4

drop  if strpos(schnam, "VIRTUAL")  | strpos(schnam, "ONLINE") | strpos(schnam, "ADULT") | strpos(schnam, "AMIKIDS") ///
 | strpos(schnam, "AMI KIDS") | strpos(schnam, "JAIL") | strpos(schnam, "JUVENILE") | strpos(schnam, "TRANSITION") ///
 | strpos(schnam, "HALFWAY") | strpos(schnam, "LIFE SKILLS") | strpos(schnam, "DROPOUT") | strpos(schnam, "HOSPITAL") ///
  | strpos(schnam, "PARENT")  | strpos(schnam, "DETENTION") | strpos(schnam, "SUMMER") | strpos(schnam, "ADDICTIONS") ///
  | strpos(schnam, "PACE") | strpos(schnam, "SEAL CENTER")  | strpos(schnam, "DROP BACK IN")
 
 drop if strpos(schnam, "BLIND") | strpos(schnam, "DEAF") | strpos(schnam, "SPECIAL NEEDS")
 drop if strpos(schnam, "CITRUS HEALTH") | strpos(schnam, "HIGHLAND PARK SIPP") | strpos(schnam, "GULF/LAKE ACADEMY") ///
 | strpos(schnam, "THE PORT ACACEMY") | strpos(schnam, "E SEAL") | strpos(schnam, "SHERIFF")
 
describe id

sort id year

by id: egen firstyear=min(year)


********************descriptive statistics on changing schools
g changeschool=0
by id: replace changeschool=1 if school[_n]!=school[_n-1] & school[_n]!=.
replace changeschool=0 if year==firstyear

by id: egen totalchanges=sum(changeschool)

tab grade changeschool
tab totalchanges

tab down60any, missing

*IMPUTING THE DOWNWIND VARIABLES BY SCHOOL FOR ANY THAT ARE MISSING
*DES note: in 5% sample I don't thik the first set of code does anything
sort school
by school: egen down60_4=max(down60any) 
by school: egen down_IS4=max(down_IS_5)
by school: egen down_USH4=max(down_USH_5)
by school: egen down_aadt48_4=max(down_aadt48_5)
by school: egen down_aadt25_4=max(down_aadt25_5)
by school: egen down_aadt18_4=max(down_aadt18_5)

replace down_IS4=. if mi_to_nid_IS>0.4 & mi_to_nid_IS!=.
replace down_USH4=. if mi_to_nid_USH>0.4 & mi_to_nid_USH!=.
replace down_aadt48_4=. if mi_to_nid_aadt48>0.4 & mi_to_nid_aadt48!=.
replace down_aadt25_4=. if mi_to_nid_aadt25>0.4 & mi_to_nid_aadt25!=.
replace down_aadt18_4=. if mi_to_nid_aadt18>0.4 & mi_to_nid_aadt18!=.

sum mi_to_nid_IS if down_IS4~=.


sum down60*


*DOWNWIND OF ANY ROAD WITH AADT>=25000
egen downalt60_4=rowmax(down60IS_4 down60USH_4 down60aadt48_4 down60aadt25_4)  //downwind of IS, USH, AADT>2500

*downwind of AADT>=48000
egen down60_48ormr_4=rowmax(down60IS_4 down60USH_4 down60aadt48_4) 

tab down60_48ormr_4 downalt60_4, missing

egen down60_USormr_4=rowmax(down60IS_4 down60USH_4) 

bysort school: egen windaadt25_4=max(windaadt25) 
replace windaadt25_4 = . if  mi_to_nid_aadt25>.4  & windaadt25~=.
assert mi_to_nid_aadt25<=.4 if windaadt25_4~=.

by school: egen windaadt48_4=max(windaadt48)
replace windaadt48_4 = . if  mi_to_nid_aadt48>.4  & windaadt48~=.
assert mi_to_nid_aadt48<=.4 if windaadt48_4~=.

by school: egen windIS_4=max(windIS)
replace windIS_4 = . if  mi_to_nid_IS>.4  & windIS~=.
assert mi_to_nid_IS<=.4 if windIS_4~=.


by school: egen windUSH_4=max(windUSH)
replace windUSH_4 = . if  mi_to_nid_USH>.4  & windUSH~=.
assert mi_to_nid_USH<=.4 if windUSH_4~=.

*assign wind intensity between interstates and highways
*first identify cases where there are values for both;

sum windIS_4 if windUSH_4~=.  

sum windUSH_4 if windIS_4 ~=.

*for places that are exposed to both take the max
egen windISUSH = rowmax(windIS_4 windUSH_4) if windIS_4~=. & windUSH_4~=.

replace windISUSH= windIS_4 if windUSH_4==. & windISUSH==. & windIS_4~=.
replace windISUSH = windUSH_4  if windIS_4==. & windISUSH==. & windUSH_4~=.


*create combined distance to road measure and then 

gen mi_to_nid_IS_4 = mi_to_nid_IS if mi_to_nid_IS<=0.4 & mi_to_nid_IS~=.
gen mi_to_nid_USH_4 = mi_to_nid_USH if mi_to_nid_USH<=0.4 & mi_to_nid_USH~=.


egen mi_toISUSH_4 = rowmin(mi_to_nid_USH_4 mi_to_nid_IS_4 ) if mi_to_nid_IS_4~=. & mi_to_nid_USH_4~=. 
replace mi_toISUSH_4 = mi_to_nid_IS_4 if mi_to_nid_USH_4==. & mi_toISUSH_4==. & mi_to_nid_IS_4~=.
replace mi_toISUSH_4 = mi_to_nid_USH_4 if mi_to_nid_IS_4==. & mi_toISUSH_4==. & mi_to_nid_USH_4~=.

* now create distance bins

gen mi1 = (mi_toISUSH_4>=0 & mi_toISUSH_4<0.05) if mi_toISUSH_4~=.
gen mi2 = (mi_toISUSH_4>=0.05 & mi_toISUSH_4<0.1) if mi_toISUSH_4~=.
gen mi3 = (mi_toISUSH_4>=0.1 & mi_toISUSH_4<0.15) if mi_toISUSH_4~=.
gen mi4 = (mi_toISUSH_4>=0.15 & mi_toISUSH_4<0.2) if mi_toISUSH_4~=.
gen mi5 = (mi_toISUSH_4>=0.2 & mi_toISUSH_4<0.25) if mi_toISUSH_4~=.
gen mi6 = (mi_toISUSH_4>=0.3 & mi_toISUSH_4<0.35) if mi_toISUSH_4~=.
gen mi7 = (mi_toISUSH_4>=0.3 & mi_toISUSH_4<0.35) if mi_toISUSH_4~=.
gen mi8 = (mi_toISUSH_4>=0.35 & mi_toISUSH_4<=0.4) if mi_toISUSH_4~=.

sum mi*

foreach rd in windISUSH down60_USormr_4{
forvalues j=1(1)8{

gen `rd'mi`j' = `rd'*mi`j'
}
}
sum windISUSHmi* down60_USormr_4mi*

*create inverse distance 
gen invdis = 0.4-mi_toISUSH_4 if mi_toISUSH_4~=.

*now interact inverse distance with wind exposure;

gen down60_invdis = down60_USormr_4*invdis
gen windISUSH_invdis = windISUSH*invdis

sum invdis down60_invdis windISUSH_invdis


*******additional RHS variable

egen avgfcat = rowmean(stdfcatread stdfcatmath)
bysort school year: egen percenthispanic=mean(hisp)

*DES: this following was to convert student id into numeric in full sample,
* not needed in 5% sample, needed for claudia's code


*****code up mover
bysort id (year): gen yearnew = _n
sort id yearnew
xtset id yearnew

gen mover = school - school[_n-1] 
replace mover = 1 if mover~=0 
bysort id (year): replace mover= 0 if _n==1 // don't count the first year as a move

*now create an indicator if moved last year

gen mover2yr = mover[_n-1]-mover
replace mover2yr=0 if mover2yr==-1
bysort id (year): replace mover2yr= 0 if _n==1 // don't count the first year as a move



gen mover3yr = mover2yr[_n-1]-mover2yr if mover~=1
replace mover3yr=0 if mover3yr==-1 | mover3yr==.
bysort id (year): replace mover3yr= 0 if _n==1 // don't count the first year as a move


drop yearnew

replace PercAbsent=PercAbsent/100 if PercAbsent>1
replace TeachDegree=TeachDegree/100 if TeachDegree>1
replace stability = stability/100 if stability>1
replace FRL = FRL/100 if FRL>1


*Imputing the data from adjacent cells
sort school year
foreach i in TeachDegree size stability FRL PercAbsent21days {
by school: replace `i' = `i'[_n-1] if `i'[_n] == .
by school: replace `i' = `i'[_n+1] if `i'[_n] == .
by school year: egen `i'2=max(`i')
drop `i'
rename `i'2 `i'
}


*Create grade repetition variable
gen graderep = .
sort id year
by id: replace graderep = 0 if grade != grade[_n+1]
by id: replace graderep = 1 if grade == grade[_n+1]

*Make district number variable for district fes
sort DistrictNumber
by DistrictNumber: egen distnum=max(DistrictNumber)

*Other new highway variables
egen mitohwy=rowmin(mi_to_nid_IS4 mi_to_nid_USH4)

*Assign percent of time downwind for highways
*g down_hwy4=down_IS4 if mi_to_nid_IS4<=0.4
*replace down_hwy4=down_USH4 if down_hwy4==. &  mi_to_nid_USH4<=0.4

egen down_hwy4=rowmax(down_IS4 down_USH4) if mi_to_nid_IS4<=0.4 & mi_to_nid_USH4<=0.4
replace down_hwy4=down_IS4 if down_hwy4==. &  mi_to_nid_IS4<=0.4
replace down_hwy4=down_USH4 if down_hwy4==. &  mi_to_nid_USH4<=0.4
replace down_hwy4=0 if (mitohwy>0.4 & mitohwy!=.) | (down_hwy4==. & mitohwy!=.)


*Downwind intensity for highways
gen dwintensity4h = windIS_4 if mi_to_nid_IS4<=0.4
replace dwintensity4h=windUSH_4 if dwintensity4h==. & mi_to_nid_USH4<=0.4
	label var dwintensity4h "Mean of intensity hwy, 0.1=10% from upwind"

bysort school: egen down_IS5=max(down_IS_5)
bysort school: egen down_USH5=max(down_USH_5)
	
*percent of time downwind if downwind intensity >0.5
*g down_hwyintense=down_hwy4 if dwintensity4h>0.5

*just downwind of highways
g downwindmoresixty4h=1 if down_IS4>=0.6 & down_IS4~=. & mi_to_nid_IS4<=0.4 
replace downwindmoresixty4h=1 if down_USH4>=0.6 & down_USH4~=. & mi_to_nid_USH4<=0.4 
replace downwindmoresixty4h=0 if downwindmoresixty4h!=1 & dwintensity4h!=.

*within 0.5 miles of a highway
g closetohwy5=1 if mitohwy<=0.5 
replace closetohwy5=0 if mitohwy>0.5 & mitohwy!=.

g closetohwy75=1 if mitohwy<=0.75 
replace closetohwy75=0 if mitohwy>0.75 & mitohwy!=.

g closetohwydwind60_5=1 if down_IS5>=0.6 & down_IS5~=. & mitohwy<=0.5 
replace closetohwydwind60_5=1 if down_USH5>=0.6 & down_USH5~=. & mitohwy<=0.5
replace closetohwydwind60_5=0 if (mitohwy>0.5 & mitohwy!=.) | down_IS5<0.6 | down_USH5<0.6 

g closetohwydwind60_4=1 if down_IS4>=0.6 & down_IS4~=. & mitohwy<=0.4 
replace closetohwydwind60_4=1 if down_USH4>=0.6 & down_USH4~=. & mitohwy<=0.4
replace closetohwydwind60_4=0 if (mitohwy>0.4 & mitohwy!=.) | downwindmoresixty4h==0


*jenni method variables

g changein6or9 = (grade1==6 | grade1==9) & changeschool==1
bysort id: egen changein6or9id = max(changein6or9)

*road count variables
g rdcnt_win4_mjrhwy2=rdcnt_win4_mjrhwy
replace rdcnt_win4_mjrhwy2=0 if rdcnt_win4_mjrhwy==. & rdcnt_win10_mjrhwy!=.
g rdcnt4to10=rdcnt_win10_mjrhwy - rdcnt_win4_mjrhwy2

*road count fes within 0.4 miles
tab rdcnt_win4_mjrhwy, gen(rdcntdum)

*3 or more roads within 0.4 miles
g rdcntmore3=rdcnt_win4_mjrhwy>=3

*interaction
g rdcntint=rdcnt_win4_mjrhwy2*down60_any_mjrhwy_4

*Fixing the miles from road variable
replace mi_to_nid1_mjrhwy=mitohwy if mitohwy<mi_to_nid1_mjrhwy

forvalues i=1/9 {
gen distmj`i' = mi_to_nid1_mjrhwy>0.`i'-0.1 & mi_to_nid1_mjrhwy<=0.`i'
}
g distmj10=mi_to_nid1_mjrhwy>0.9 & mi_to_nid1_mjrhwy<=1

*distance bins
gen disthw1 = mitohwy>0 & mitohwy<=0.1
gen disthw2 = mitohwy>0.1 & mitohwy<=0.2
gen disthw3 = mitohwy>0.2 & mitohwy<=0.3
gen disthw4 = mitohwy>0.3 & mitohwy<=0.4
gen disthw5 = mitohwy>0.4 & mitohwy<=0.5
gen disthw6 = mitohwy>0.5 & mitohwy<=0.6
gen disthw7 = mitohwy>0.6 & mitohwy<=0.7
gen disthw8 = mitohwy>0.7 & mitohwy<=0.8
gen disthw9 = mitohwy>0.8 & mitohwy<=0.9
gen disthw10 = mitohwy>0.9 & mitohwy<=1


*dwintensity with >0.4 mi control group
g dwintensity4hdist=dwintensity4h
replace dwintensity4hdist =0 if dwintensity4hdist==. & mitohwy>0.4 & mitohwy!=.

gen dwintensity4h1 = windIS 
replace dwintensity4h1=windUSH if dwintensity4h1==. 

g revmitohwy=1-mitohwy
g dwintensity4hdisti=dwintensity4h1*revmitohwy

g down60_any_mjrhwy2=down60_any_mjrhwy_4
replace down60_any_mjrhwy2=0 if mi_to_nid1_mjrhwy>.4 & mi_to_nid1_mjrhwy<1
replace down60_any_mjrhwy2=closetohwydwind60_4 if down60_any_mjrhwy2==.

g down60_any_aadt25k2=down60_any_aadt25k_4
replace down60_any_aadt25k2=0 if mi_to_nid1_aadt25k>.4 & mi_to_nid1_aadt25k<1

sort id year

global model3 momedbyschool momblackbyschool mommarriedbyschool percenthispanic frl TeachDegree size stability mover distmj1 distmj2 distmj3 rdcnt_win10_mjrhwy

save "$samples\redceclptestwk_32219.dta", replace

drop if changein6or9id==0

	keep if (grade1 == 5 | grade1 == 6 | grade1 == 8 | grade1 == 9)

reghdfe avgfcat down60_any_mjrhwy_4 ${model3} gradedum4 gradedum7 , absorb(zip id year) vce(cluster id zip)

